#!/usr/bin/python3
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup


class NgaDetailSpider:
    def __init__(self):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) "
                          "Chrome/70.0.3538.110 Safari/537.36 "
        }
        self.page = 1
        self.cookie = "taihe_bi_sdk_uid=1b7956ba96a3d5712889f88af9b1984d; " \
                      "UM_distinctid=1780f64d40d13e-038eb18ccee1ed-53e356a-1fa400-1780f64d40e95d; " \
                      "taihe=4dbdbd86986cdf4d06a583c18790ee4c; " \
                      "UM_distinctid=1787cc30e0635a-07f220fb9b26cd-53e356a-1fa400-1787cc30e07cc6; " \
                      "ngaPassportUid=60556366; ngaPassportUrlencodedUname=daixu_y; " \
                      "ngaPassportCid=X94e27dsvr29diljh3k77kss3l4b1e3bptarldfd; " \
                      "taihe_bi_sdk_session=b40c2bc8c247b16cf9d91343c309a6a1; " \
                      "CNZZDATA30043604=cnzz_eid%3D1785528018-1608790865-https%253A%252F%252Fbbs.nga.cn%252F%26ntime" \
                      "%3D1617152497; bbsmisccookies=%7B%22pv_count_for_insad%22%3A%7B0%3A-34%2C1%3A1617210004%7D%2C" \
                      "%22insad_views%22%3A%7B0%3A1%2C1%3A1617210004%7D%2C%22uisetting%22%3A%7B0%3A%22b%22%2C1" \
                      "%3A1617154487%7D%7D; _cnzz_CV30043604=forum%7Cfid-343809%7C0; " \
                      "lastpath=/thread.php?fid=-7&page=1&lite=js&noprefix; lastvisit=1617175961; " \
                      "ngacn0comUserInfo=daixu_y%09daixu_y%0939%0939%09%0911%09116400%094%090%090%0961_47%2C53_30; " \
                      "ngacn0comUserInfoCheck=cd368a2b4e8987f3192b75b56c363c30; ngacn0comInfoCheckTime=1617175961 "

    def get_content(self, url):
        cookie_dict = {i.split("=")[0]: i.split("=")[-1] for i in self.cookie.split("; ")}
        response = requests.get(url=url, headers=self.headers, cookies=cookie_dict)
        return response.content

    def run(self):
        # 1. 获取 url 列表
        # url = "https://bbs.nga.cn//read.php?tid=26668062&page=1"
        url = "https://bbs.nga.cn/read.php?tid=26706744"
        # 2. 发送请求获取响应
        content = self.get_content(url)

        # 3. 从响应中提取数据
        self.get_detail(content)

    def get_detail(self, content):
        # https://bbs.nga.cn//read.php?tid=25669562
        html = str(content, 'gb18030')
        soup = BeautifulSoup(html, 'lxml')
        # print(soup)
        # title = soup.title.string
        # print(title)
        print('-----------第' + str(self.page) + '页-----------')
        item_list = soup.findAll(class_='postcontent ubbcode')
        for item in item_list:
            print(item.get_text())

        if soup.find(id='pagebbtm').find('a', title='下一页') is not None:
            nextPage = soup.find(id='pagebbtm').find(title='下一页').get('href')
            url = 'https://bbs.nga.cn' + nextPage
            print(url)
            self.get_next(url)

    def get_next(self, url):
        content = self.get_content(url)

        self.page = self.page + 1
        # 3. 从响应中提取数据
        self.get_detail(content)


if __name__ == '__main__':
    spider = NgaDetailSpider()
    spider.run()
